#include <stdio.h>

#include <stdlib.h>

#include <math.h>

#include <time.h>

#include <sys/time.h>

double randlc(double * x, double a);
void vranlc(int n, double * x, double a, double y[]);
void timer_clear(int n);
void timer_start(int n);
void timer_stop(int n);
double timer_read(int n);
enum NAS_MG_B_c_16736 { false, true };
typedef enum NAS_MG_B_c_16736 logical;
struct named_NAS_MG_B_c_16750
{
	double real;
	double imag;
};

typedef struct named_NAS_MG_B_c_16750 dcomplex;
void print_results(char * name, char class, int n1, int n2, int n3, int niter, double t, double mops, char * optype, logical verified, char * npbversion, char * compiletime, char * cs1, char * cs2, char * cs3, char * cs4, char * cs5, char * cs6, char * cs7);
static logical timeron;
static void setup(int * n1, int * n2, int * n3);
static void mg3P(double u[], double v[], double r[], double a[4], double c[4], int n1, int n2, int n3);
static void psinv(void * or, void * ou, int n1, int n2, int n3, double c[4], int k);
static void resid(void * ou, void * ov, void * or, int n1, int n2, int n3, double a[4], int k);
static void rprj3(void * or, int m1k, int m2k, int m3k, void * os, int m1j, int m2j, int m3j, int k);
static void interp(void * oz, int mm1, int mm2, int mm3, void * ou, int n1, int n2, int n3, int k);
static void norm2u3(void * or, int n1, int n2, int n3, double * rnm2, double * rnmu, int nx, int ny, int nz);
static void rep_nrm(void * u, int n1, int n2, int n3, char * title, int kk);
static void comm3(void * ou, int n1, int n2, int n3, int kk);
static void zran3(void * oz, int n1, int n2, int n3, int nx, int ny, int k);
static void showall(void * oz, int n1, int n2, int n3);
static double power(double a, int n);
static void bubble(double ten[][2], int j1[][2], int j2[][2], int j3[][2], int m, int ind);
static void zero3(void * oz, int n1, int n2, int n3);
static double u[(((((((((1*(2+(1<<8)))*(2+(1<<8)))*(2+(1<<8)))+((2+(1<<8))*(2+(1<<8))))+(5*(2+(1<<8))))+(7*8))+6)/7)*8)];
static double v[(((((((((1*(2+(1<<8)))*(2+(1<<8)))*(2+(1<<8)))+((2+(1<<8))*(2+(1<<8))))+(5*(2+(1<<8))))+(7*8))+6)/7)*8)];
static double r[(((((((((1*(2+(1<<8)))*(2+(1<<8)))*(2+(1<<8)))+((2+(1<<8))*(2+(1<<8))))+(5*(2+(1<<8))))+(7*8))+6)/7)*8)];
static int is1, is2, is3, ie1, ie2, ie3;
static int nx[((8+1)+1)];
static int ny[((8+1)+1)];
static int nz[((8+1)+1)];
static char Class;
static int debug_vec[8];
static int m1[((8+1)+1)];
static int m2[((8+1)+1)];
static int m3[((8+1)+1)];
static int ir[((8+1)+1)];
static int lt, lb;


#include <omp.h>
#include <time.h>
#include <sys/time.h>
#include <string.h>
#include <unistd.h>
#include <math.h>
#define RED  "\x1B[31m"
#define GRN  "\x1B[32m"
#define MAG  "\x1B[35m"
#define RESET "\x1B[0m"
//When Initial directive is defined the original code runs in order to save the variables and timers' values.
#define Initial
//Once the Experimental directive is defined, the Experimental section runs and the results before and after modification to this code section gets compared.
#define Experimental
#ifdef Experimental
#undef Initial
#endif //Experimental
#define closeEnough(a, b) (fabs((a - b)) < 0.001)? 1 : 0
//Due to the fact that normalized loop iteration numbers begin at 0, the default option is set to 0.
//By default, the input and output of the experimental section are saved to the file on the first execution.
//When the experimental section is iterated over, this option allows the user to save the input and output of the iteration of the choice to a file.
#define iterationNum 10

//Set the verbosity based on how detailed you want the results:
//The initial value of verbosity is set based on how it is set in the Cetus run
//#define verbosity 0 shows the results of the comparison of the variables,
//#define verbosity 1 shows the execution time of the experimental section, the result of the comparison, and the value of the variables,
//#define verbosity 2 shows the reading/writing state of variables.
#define verbosity 0

void read_var_from_file(char *varName,void* var, size_t sizeOfType, size_t numElements, FILE* fp){
	if(fread(var, sizeOfType, numElements, fp) != numElements) {
			if(feof(fp))
				printf("%sPremature end of file.%s\n",RED,RESET);
			else
				printf("%sFile read error of var %s %s\n",RED,varName,RESET);
	}else if(verbosity > 1) {
			printf("%sThe value of variable %s is read from the file.%s\n",GRN,varName,RESET);
	     }
}

void write_var_to_file(char *varName, void* var, size_t sizeOfType, size_t numElements, FILE* fp){
	if(fwrite(var, sizeOfType, numElements, fp) != numElements) {
			if(feof(fp))
				printf("%sPremature end of file.%s\n",RED,RESET);
			else
				printf("%sFile write error of var %s %s\n",RED,varName,RESET);
	}else if(verbosity > 1) {
			printf("%sThe value of variable %s is written to the file.%s\n",GRN,varName,RESET);
	     }
}

#ifdef Experimental
void compare_two_int_variables_in_binary_files(FILE *initialOutputStateFile,FILE *modifiedOutputStateFile, char *varName, size_t sizeOfType, size_t numElements,char *varType, int filePointer){
	size_t n1, n2; //to check how many elements are read
	int offset = 0; //reports where the difference is
	int *tmp1 = malloc(sizeof(int) * numElements);
	int *tmp2 = malloc(sizeof(int) * numElements);
	initialOutputStateFile=fopen(initialOutputStateFile,"rb");
	modifiedOutputStateFile=fopen(modifiedOutputStateFile,"rb");
	if(initialOutputStateFile == NULL){
			printf("%sError: Failed to open initialOutputStateFile.%s\n", RED, RESET);
			return;
	}
	if(modifiedOutputStateFile == NULL){
		printf("%sError: Failed to open modifiedOutputStateFile.%s\n", RED, RESET);
		return;
	}
	//sets the file pointer at the right position
	if(fseek(initialOutputStateFile, filePointer, SEEK_SET) != 0){
			printf("%sError: Failed to set the file pointer for initialOutputStateFile.%s\n", RED, RESET);
			return;
	}
	if(fseek(modifiedOutputStateFile, filePointer, SEEK_SET) != 0){
			printf("%sError: Failed to set the file pointer for modifiedOutputStateFile.%s\n", RED, RESET);
			return;
	}
	n1 = fread(tmp1, sizeOfType, numElements, initialOutputStateFile);
	if (n1 <numElements && ferror(initialOutputStateFile)) {
		   printf("%sError: Failed to read from initialOutputStateFile. %s\n ",RED,RESET );
	}else if(verbosity > 1) {printf("%sinitialOutputStateFile is read successfully.%s\n",GRN,RESET); }
		n2 = fread(tmp2, sizeOfType, numElements, modifiedOutputStateFile);
		if (n2 <numElements && ferror(modifiedOutputStateFile)) {
			   printf("%sError: Failed to read from modifiedOutputStateFile.%s\n",RED,RESET);
		}else if(verbosity > 1) {printf("%smodifiedOutputStateFile is read successfully.%s\n",GRN,RESET); }
			// Check for possible buffer overflows when copying data from tmp1 and tmp2.
			size_t n_min = n1 < n2 ? n1 : n2;
			int ret= memcmp(tmp1, tmp2, sizeOfType * numElements);
			if(ret != 0) {
				   for (size_t i = 0; i < n_min; i++) {
					     if (tmp1[i] != tmp2[i]) {
						      offset = i;
							  if (n_min==1){
						         printf("%sMISMATCH: The value of variable %s before and after modification differs.%s\n", MAG, varName , RESET);}
						 	  else{
						          printf("%sMISMATCH: The value of variable %s before and after modification differs starting from element %d.%s\n",MAG,varName,offset, RESET);}
								   break;
						   }}
				}else if(ret==0){
					   printf("%sMATCH: The value of variable %s before and after modification is equal.%s\n",GRN,varName,RESET);
				}
				free(tmp1);
				free(tmp2);
				return;
			}

			void compare_two_double_variables_in_binary_files(FILE *initialOutputStateFile,FILE *modifiedOutputStateFile, char *varName, size_t sizeOfType, size_t numElements,char *varType, int filePointer){
				size_t n1, n2; //to check how many elements are read
				int offset = 0; //reports where the difference is
				double *tmp1 = malloc(sizeof(double) * numElements); //size 8
				double *tmp2 = malloc(sizeof(double) * numElements); //size 8
				initialOutputStateFile=fopen(initialOutputStateFile,"rb");
				modifiedOutputStateFile=fopen(modifiedOutputStateFile,"rb");
				if(initialOutputStateFile == NULL){
						printf("%sError: Failed to open initialOutputStateFile.%s\n", RED, RESET);
						return;
				}
				if(modifiedOutputStateFile == NULL){
						printf("%sError: Failed to open modifiedOutputStateFile.%s\n", RED, RESET);
						return;
				}
				if(fseek(initialOutputStateFile, filePointer, SEEK_SET) != 0){
						printf("%sError: Failed to set the file pointer for initialOutputStateFile.%s\n", RED, RESET);
						return;
				}
				if(fseek(modifiedOutputStateFile, filePointer, SEEK_SET) != 0){
						printf("%sError: Failed to set the file pointer for modifiedOutputStateFile.%s\n", RED, RESET);
						return;
				}
				n1 = fread(tmp1, sizeOfType, numElements, initialOutputStateFile);
				if (n1 <numElements && ferror(initialOutputStateFile)) {
						printf("%sError: Failed to read from initialOutputStateFile. %s\n ",RED,RESET );
				}else if(verbosity > 1) {printf("%sinitialOutputStateFile is read successfully.%s\n",GRN,RESET); }
					n2 = fread(tmp2, sizeOfType, numElements, modifiedOutputStateFile);
					if (n2 <numElements && ferror(modifiedOutputStateFile)) {
							printf("%sError: Failed to read from modifiedOutputStateFile.%s\n",RED,RESET);
					}else if(verbosity > 1) {printf("%smodifiedOutputStateFile is read successfully.%s\n",GRN,RESET); }
						size_t n_min = n1 < n2 ? n1 : n2;
						int ret= memcmp(tmp1, tmp2, sizeOfType * numElements);
						if(ret != 0) {
								for (size_t i = 0; i < n_min; i++) {
										if (tmp1[i] != tmp2[i] && !closeEnough(tmp1[i],tmp2[i]) && !(isnan(tmp1[i]) && isnan(tmp2[i])) ) {
												offset = i;
												if (n_min==1){
													printf("%sMISMATCH: The value of variable %s before and after modification differs.%s\n", MAG, varName , RESET);}
												else if((isnan(tmp1[i]) && isnan(tmp2[i]))){
													continue;
												}
												else if(!closeEnough(tmp1[i],tmp2[i])){
														printf("%sMISMATCH: The value of variable %s before and after modification differs starting from element %d.%s\n",MAG,varName,offset, RESET);
													break;}
										}else if ((tmp1[i] == tmp2[i] && i==n_min-1) || (closeEnough(tmp1[i],tmp2[i]) && i==n_min-1)){
												printf("%sMATCH: The value of variable %s before and after modification is equal.%s\n",GRN,varName,RESET);
												break;
										}
								}
						}else if(ret==0){
								printf("%sMATCH: The value of variable %s before and after modification is equal.%s\n",GRN,varName,RESET);
						}
						free(tmp1);
						free(tmp2);
						return;
					}

					void compare_two_float_variables_in_binary_files(FILE *initialOutputStateFile,FILE *modifiedOutputStateFile, char *varName, size_t sizeOfType, size_t numElements,char *varType, int filePointer){
						size_t n1, n2; //to check how many elements are read
						int offset = 0; //reports where the difference is
						float *tmp1 = malloc(sizeof(float) * numElements); //size 4
						float *tmp2 = malloc(sizeof(float) * numElements); //size 4
						initialOutputStateFile=fopen(initialOutputStateFile,"rb");
						modifiedOutputStateFile=fopen(modifiedOutputStateFile,"rb");
						if(initialOutputStateFile == NULL){
								printf("%sError: Failed to open initialOutputStateFile.%s\n", RED, RESET);
								return;
						}
						if(modifiedOutputStateFile == NULL){
								printf("%sError: Failed to open modifiedOutputStateFile.%s\n", RED, RESET);
								return;
						}
						if(fseek(initialOutputStateFile, filePointer, SEEK_SET) != 0){
								printf("%sError: Failed to set the file pointer for initialOutputStateFile.%s\n", RED, RESET);
								return;
						}
						if(fseek(modifiedOutputStateFile, filePointer, SEEK_SET) != 0){
								printf("%sError: Failed to set the file pointer for modifiedOutputStateFile.%s\n", RED, RESET);
								return;
						}
						n1 = fread(tmp1, sizeOfType, numElements, initialOutputStateFile);
						if (n1 <numElements && ferror(initialOutputStateFile)) {
								printf("%sError: Failed to read from initialOutputStateFile. %s\n ",RED,RESET );
						}else if(verbosity > 1) {printf("%sinitialOutputStateFile is read successfully.%s\n",GRN,RESET); }
							n2 = fread(tmp2, sizeOfType, numElements, modifiedOutputStateFile);
							if (n2 <numElements && ferror(modifiedOutputStateFile)) {
									printf("%sError: Failed to read from modifiedOutputStateFile.%s\n",RED,RESET);
							}else if(verbosity > 1) {printf("%smodifiedOutputStateFile is read successfully.%s\n",GRN,RESET); }
								size_t n_min = n1 < n2 ? n1 : n2;
								int ret= memcmp(tmp1, tmp2, sizeOfType * numElements);
								if(ret != 0) {
										for (size_t i = 0; i < n_min; i++) {
												if (tmp1[i] != tmp2[i] && !closeEnough(tmp1[i],tmp2[i]) && !(isnan(tmp1[i]) && isnan(tmp2[i])) ) {
														offset = i;
														if (n_min==1){
															printf("%sMISMATCH: The value of variable %s before and after modification differs.%s\n", MAG, varName , RESET);}
														else if((isnan(tmp1[i]) && isnan(tmp2[i]))){
															continue;
														}
														else if(!closeEnough(tmp1[i],tmp2[i])){
																printf("%sMISMATCH: The value of variable %s before and after modification differs starting from element %d.%s\n",MAG,varName,offset, RESET);
															break;}
												}else if ((tmp1[i] == tmp2[i] && i==n_min-1) || (closeEnough(tmp1[i],tmp2[i]) && i==n_min-1)){
														printf("%sMATCH: The value of variable %s before and after modification is equal.%s\n",GRN,varName,RESET);
											           break;
												}
										}
								}else if(ret==0){
										printf("%sMATCH: The value of variable %s before and after modification is equal.%s\n",GRN,varName,RESET);
								}
								free(tmp1);
								free(tmp2);
								return;
							}
							#endif // Experimental

							struct timeval startexp, endexp;
							double exp_time_used;
							double saved_exp_time_used;
							struct timeval prgstart, prgend;
							double prg_time_used;
							struct timeval captureStartInputWrite, captureEndInputWrite;
							double capture_input_write_time;
							struct timeval captureStartOutputWrite, captureEndOutputWrite;
							double capture_output_write_time;
							struct timeval replayStartInputRead, replayEndInputRead;
							double replay_input_read_time;
							struct timeval replayStartOutputWriteCompare, replayEndOutputWriteCompare;
							double replay_output_write_time;
							int iteration_count =0; //Counts the number of iterations the experimental section goes through.
							int _ret_val_0;

							FILE* initialInputStateFile;
							FILE* initialOutputStateFile;
							FILE* modifiedOutputStateFile;

							#ifdef Initial
							int main()
							{

								omp_set_num_threads(4);
								gettimeofday(&prgstart, NULL);


								int k, it;
								double t, tinit, mflops;
								double a[4], c[4];
								double rnm2, rnmu, old2, oldu, epsilon;
								int n1, n2, n3, nit;
								double nn, verify_value, err;
								logical verified;
								int i;
								char * t_names[10];
								double tmax;
								FILE * fp;
								int _ret_val_0;
								#pragma loop name main#0

								#pragma cetus private(i)
								for (i=0; i<10; i ++ )
								{
									timer_clear(i);
								}
								timer_start(0);
								if ((fp=fopen("timer.flag", "r"))!=((void * )0))
								{
									timeron=true;
									t_names[0]="init";
									t_names[1]="benchmk";
									t_names[2]="mg3P";
									t_names[3]="psinv";
									t_names[4]="resid";
									t_names[6]="rprj3";
									t_names[7]="interp";
									t_names[8]="norm2";
									t_names[9]="comm3";
									fclose(fp);
								}
								else
								{
									timeron=false;
								}
								printf("\n\n NAS Parallel Benchmarks (NPB3.3-SER-C) - MG Benchmark\n\n");
								if ((fp=fopen("mg.input", "r"))!=((void * )0))
								{
									int result;
									printf(" Reading from input file mg.input\n");
									result=fscanf(fp, "%d\n",  & lt);
									while (fgetc(fp)!='\n')
									{
										;
									}
									result=fscanf(fp, "%d%d%d",  & nx[lt],  & ny[lt],  & nz[lt]);
									while (fgetc(fp)!='\n')
									{
										;
									}
									result=fscanf(fp, "%d",  & nit);
									while (fgetc(fp)!='\n')
									{
										;
									}
									#pragma loop name main#1

									#pragma cetus private(i, result)
									for (i=0; i<=7; i ++ )
									{
										result=fscanf(fp, "%d",  & debug_vec[i]);
									}
									fclose(fp);
								}
								else
								{
									printf(" No input file. Using compiled defaults \n");
									lt=8;
									nit=20;
									nx[lt]=256;
									ny[lt]=256;
									nz[lt]=256;
									#pragma loop name main#2

									#pragma cetus private(i)
									#pragma cetus parallel
									for (i=0; i<=7; i ++ )
									{
										debug_vec[i]=0;
									}
								}
								if ((nx[lt]!=ny[lt])||(nx[lt]!=nz[lt]))
								{
									Class='U';
								}
								else
								{
									if ((nx[lt]==32)&&(nit==4))
									{
										Class='S';
									}
									else
									{
										if ((nx[lt]==128)&&(nit==4))
										{
											Class='W';
										}
										else
										{
											if ((nx[lt]==256)&&(nit==4))
											{
												Class='A';
											}
											else
											{
												if ((nx[lt]==256)&&(nit==20))
												{
													Class='B';
												}
												else
												{
													if ((nx[lt]==512)&&(nit==20))
													{
														Class='C';
													}
													else
													{
														if ((nx[lt]==1024)&&(nit==50))
														{
															Class='D';
														}
														else
														{
															if ((nx[lt]==2048)&&(nit==50))
															{
																Class='E';
															}
															else
															{
																Class='U';
															}
														}
													}
												}
											}
										}
									}
								}
								a[0]=(( - 8.0)/3.0);
								a[1]=0.0;
								a[2]=(1.0/6.0);
								a[3]=(1.0/12.0);
								if (((Class=='A')||(Class=='S'))||(Class=='W'))
								{
									c[0]=(( - 3.0)/8.0);
									c[1]=(( + 1.0)/32.0);
									c[2]=(( - 1.0)/64.0);
									c[3]=0.0;
								}
								else
								{
									c[0]=(( - 3.0)/17.0);
									c[1]=(( + 1.0)/33.0);
									c[2]=(( - 1.0)/61.0);
									c[3]=0.0;
								}
								lb=1;
								k=lt;
								setup( & n1,  & n2,  & n3);
								zero3(u, n1, n2, n3);
								zran3(v, n1, n2, n3, nx[lt], ny[lt], k);
								norm2u3(v, n1, n2, n3,  & rnm2,  & rnmu, nx[lt], ny[lt], nz[lt]);
								printf(" Size: %4dx%4dx%4d  (class %c)\n", nx[lt], ny[lt], nz[lt], Class);
								printf(" Iterations: %3d\n", nit);
								printf("\n");
								resid(u, v, r, n1, n2, n3, a, k);
								norm2u3(r, n1, n2, n3,  & rnm2,  & rnmu, nx[lt], ny[lt], nz[lt]);
								old2=rnm2;
								oldu=rnmu;
								mg3P(u, v, r, a, c, n1, n2, n3);
								resid(u, v, r, n1, n2, n3, a, k);
								setup( & n1,  & n2,  & n3);
								zero3(u, n1, n2, n3);
								zran3(v, n1, n2, n3, nx[lt], ny[lt], k);
								timer_stop(0);
								tinit=timer_read(0);
								printf(" Initialization time: %15.3f seconds\n\n", tinit);
								#pragma loop name main#3

								#pragma cetus private(i)
								for (i=1; i<10; i ++ )
								{
									timer_clear(i);
								}
								timer_start(1);
								if (timeron)
								{
									timer_start(5);
								}
								resid(u, v, r, n1, n2, n3, a, k);
								if (timeron)
								{
									timer_stop(5);
								}
								norm2u3(r, n1, n2, n3,  & rnm2,  & rnmu, nx[lt], ny[lt], nz[lt]);
								old2=rnm2;
								oldu=rnmu;
								#pragma loop name main#4

								#pragma cetus private(it)
								for (it=1; it<=nit; it ++ )
								{
									if (((it==1)||(it==nit))||((it%5)==0))
									{
										printf("  iter %3d\n", it);
									}
									if (timeron)
									{
										timer_start(2);
									}
									mg3P(u, v, r, a, c, n1, n2, n3);
									if (timeron)
									{
										timer_stop(2);
									}
									if (timeron)
									{
										timer_start(5);
									}
									resid(u, v, r, n1, n2, n3, a, k);
									if (timeron)
									{
										timer_stop(5);
									}
								}
								norm2u3(r, n1, n2, n3,  & rnm2,  & rnmu, nx[lt], ny[lt], nz[lt]);
								timer_stop(1);
								t=timer_read(1);
								verified=false;
								verify_value=0.0;
								printf("\n Benchmark completed\n");
								epsilon=1.0E-8;
								if (Class!='U')
								{
									if (Class=='S')
									{
										verify_value=5.307707005734E-5;
									}
									else
									{
										if (Class=='W')
										{
											verify_value=6.467329375339E-6;
										}
										else
										{
											if (Class=='A')
											{
												verify_value=2.433365309069E-6;
											}
											else
											{
												if (Class=='B')
												{
													verify_value=1.800564401355E-6;
												}
												else
												{
													if (Class=='C')
													{
														verify_value=5.70673228574E-7;
													}
													else
													{
														if (Class=='D')
														{
															verify_value=1.58327506044E-10;
														}
														else
														{
															if (Class=='E')
															{
																verify_value=8.157592357404E-11;
															}
														}
													}
												}
											}
										}
									}
									err=(fabs(rnm2-verify_value)/verify_value);
									if (err<=epsilon)
									{
										verified=true;
										printf(" VERIFICATION SUCCESSFUL\n");
										printf(" L2 Norm is %20.13E\n", rnm2);
										printf(" Error is   %20.13E\n", err);
									}
									else
									{
										verified=false;
										printf(" VERIFICATION FAILED\n");
										printf(" L2 Norm is             %20.13E\n", rnm2);
										printf(" The correct L2 Norm is %20.13E\n", verify_value);
									}
								}
								else
								{
									verified=false;
									printf(" Problem size unknown\n");
									printf(" NO VERIFICATION PERFORMED\n");
									printf(" L2 Norm is %20.13E\n", rnm2);
								}
								nn=(((1.0*nx[lt])*ny[lt])*nz[lt]);
								if (t!=0.0)
								{
									mflops=((((58.0*nit)*nn)*1.0E-6)/t);
								}
								else
								{
									mflops=0.0;
								}
								print_results("MG", Class, nx[lt], ny[lt], nz[lt], nit, t, mflops, "          floating point", verified, "3.3.1", "05 Nov 2022", "gcc", "$(CC)", "-lm", "-I../common", "-g -Wall -O3 -mcmodel=medium", "-O3 -mcmodel=medium", "randdp");
								if (timeron)
								{
									tmax=timer_read(1);
									if (tmax==0.0)
									{
										tmax=1.0;
									}
									printf("  SECTION   Time (secs)\n");
									#pragma loop name main#5

									#pragma cetus private(i, t)
									for (i=1; i<10; i ++ )
									{
										t=timer_read(i);
										if (i==5)
										{
											t=(timer_read(4)-t);
											printf("    --> %8s:%9.3f  (%6.2f%%)\n", "mg-resid", t, (t*100.0)/tmax);
										}
										else
										{
											printf("  %-8s:%9.3f  (%6.2f%%)\n", t_names[i], t, (t*100.0)/tmax);
										}
									}
								}
								_ret_val_0=0;
								gettimeofday(&prgend, NULL);
								prg_time_used = (double) ((prgend.tv_sec * 1000000 + prgend.tv_usec) - (prgstart.tv_sec * 1000000 + prgstart.tv_usec)) / 1000000;
								printf("\nExecution time of the program in the capturing phase: %.6lf (seconds) \n", prg_time_used);
								printf("Overhead of the capturing phase: %.6lf (seconds) \n", capture_input_write_time+capture_output_write_time);
								printf("Execution time of the experimental section (before modification): %.6lf (seconds) \n", saved_exp_time_used);
							if(iteration_count>1){printf("the experimental section is iterated over %d times\n",iteration_count);}
								printf("\n\n%sUncomment the line: #define Experimental, make your changes to the experimental section then compile and run the code.%s\n",MAG ,RESET );

								return _ret_val_0;
							}

							#endif //Initial

							static void setup(int * n1, int * n2, int * n3)
							{
								int k, j;
								int ax, mi[((8+1)+1)][3];
								int ng[((8+1)+1)][3];
								ng[lt][0]=nx[lt];
								ng[lt][1]=ny[lt];
								ng[lt][2]=nz[lt];
								#pragma loop name setup#0

								#pragma cetus private(ax, k)
								for (k=(lt-1); k>=1; k -- )
								{
									#pragma loop name setup#0#0

									#pragma cetus private(ax)
									#pragma cetus parallel
									for (ax=0; ax<3; ax ++ )
									{
										ng[k][ax]=(ng[k+1][ax]/2);
									}
								}
								#pragma loop name setup#1

								#pragma cetus private(k)
								#pragma cetus parallel
								#pragma omp parallel for private(k)
								for (k=lt; k>=1; k -- )
								{
									nx[k]=ng[k][0];
									ny[k]=ng[k][1];
									nz[k]=ng[k][2];
								}
								#pragma loop name setup#2

								#pragma cetus private(ax, k)
								#pragma cetus parallel
								#pragma omp parallel for private(ax, k)
								for (k=lt; k>=1; k -- )
								{
									#pragma loop name setup#2#0

									#pragma cetus private(ax)
									for (ax=0; ax<3; ax ++ )
									{
										mi[k][ax]=(2+ng[k][ax]);
									}
									m1[k]=mi[k][0];
									m2[k]=mi[k][1];
									m3[k]=mi[k][2];
								}
								k=lt;
								is1=((2+ng[k][0])-ng[lt][0]);
								ie1=(1+ng[k][0]);
								( * n1)=((3+ie1)-is1);
								is2=((2+ng[k][1])-ng[lt][1]);
								ie2=(1+ng[k][1]);
								( * n2)=((3+ie2)-is2);
								is3=((2+ng[k][2])-ng[lt][2]);
								ie3=(1+ng[k][2]);
								( * n3)=((3+ie3)-is3);
								ir[lt]=0;
								#pragma loop name setup#3

								#pragma cetus private(j)
								for (j=(lt-1); j>=1; j -- )
								{
									ir[j]=(ir[j+1]+(((1*m1[j+1])*m2[j+1])*m3[j+1]));
								}
								if (debug_vec[1]>=1)
								{
									printf(" in setup, \n");
									printf(" k  lt  nx  ny  nz  n1  n2  n3 is1 is2 is3 ie1 ie2 ie3\n");
									printf("%4d%4d%4d%4d%4d%4d%4d%4d%4d%4d%4d%4d%4d%4d\n", k, lt, ng[k][0], ng[k][1], ng[k][2],  * n1,  * n2,  * n3, is1, is2, is3, ie1, ie2, ie3);
								}
								return ;
							}

							static void mg3P(double u[], double v[], double r[], double a[4], double c[4], int n1, int n2, int n3)
							{
								int j, k;
								#pragma loop name mg3P#0

								#pragma cetus private(j, k)
								for (k=lt; k>=(lb+1); k -- )
								{
									j=(k-1);
									rprj3( & r[ir[k]], m1[k], m2[k], m3[k],  & r[ir[j]], m1[j], m2[j], m3[j], k);
								}
								k=lb;
								zero3( & u[ir[k]], m1[k], m2[k], m3[k]);
								psinv( & r[ir[k]],  & u[ir[k]], m1[k], m2[k], m3[k], c, k);
								#pragma loop name mg3P#1

								#pragma cetus private(j, k)
								for (k=(lb+1); k<=(lt-1); k ++ )
								{
									j=(k-1);
									zero3( & u[ir[k]], m1[k], m2[k], m3[k]);
									interp( & u[ir[j]], m1[j], m2[j], m3[j],  & u[ir[k]], m1[k], m2[k], m3[k], k);
									resid( & u[ir[k]],  & r[ir[k]],  & r[ir[k]], m1[k], m2[k], m3[k], a, k);
									psinv( & r[ir[k]],  & u[ir[k]], m1[k], m2[k], m3[k], c, k);
								}
								j=(lt-1);
								k=lt;
								interp( & u[ir[j]], m1[j], m2[j], m3[j], u, n1, n2, n3, k);
								resid(u, v, r, n1, n2, n3, a, k);
								psinv(r, u, n1, n2, n3, c, k);
								return ;
							}

							static void psinv(void * or, void * ou, int n1, int n2, int n3, double c[4], int k)
							{
								double (* r)[n2][n1] = (double (* )[n2][n1])or;
								double (* u)[n2][n1] = (double (* )[n2][n1])ou;
								int i3, i2, i1;
								double r1[((2+(1<<8))+1)], r2[((2+(1<<8))+1)];
								if (timeron)
								{
									timer_start(3);
								}
								#pragma loop name psinv#0

								#pragma cetus private(i1, i2, i3, r1, r2)
								#pragma cetus parallel
								#pragma omp parallel for private(i1, i2, i3, r1, r2)
								for (i3=1; i3<(n3-1); i3 ++ )
								{
									#pragma loop name psinv#0#0

									#pragma cetus private(i1, i2)
									#pragma cetus lastprivate(r1, r2)
									for (i2=1; i2<(n2-1); i2 ++ )
									{
										#pragma loop name psinv#0#0#0

										#pragma cetus private(i1)
										for (i1=0; i1<n1; i1 ++ )
										{
											r1[i1]=(((r[i3][i2-1][i1]+r[i3][i2+1][i1])+r[i3-1][i2][i1])+r[i3+1][i2][i1]);
											r2[i1]=(((r[i3-1][i2-1][i1]+r[i3-1][i2+1][i1])+r[i3+1][i2-1][i1])+r[i3+1][i2+1][i1]);
										}
										#pragma loop name psinv#0#0#1

										#pragma cetus private(i1)
										for (i1=1; i1<(n1-1); i1 ++ )
										{
											u[i3][i2][i1]=(((u[i3][i2][i1]+(c[0]*r[i3][i2][i1]))+(c[1]*((r[i3][i2][i1-1]+r[i3][i2][i1+1])+r1[i1])))+(c[2]*((r2[i1]+r1[i1-1])+r1[i1+1])));
										}
									}
								}
								if (timeron)
								{
									timer_stop(3);
								}
								comm3(u, n1, n2, n3, k);
								if (debug_vec[0]>=1)
								{
									rep_nrm(u, n1, n2, n3, "   psinv", k);
								}
								if (debug_vec[3]>=k)
								{
									showall(u, n1, n2, n3);
								}
								return ;
							}

							static void resid(void * ou, void * ov, void * or, int n1, int n2, int n3, double a[4], int k)
							{
								double (* u)[n2][n1] = (double (* )[n2][n1])ou;
								double (* v)[n2][n1] = (double (* )[n2][n1])ov;
								double (* r)[n2][n1] = (double (* )[n2][n1])or;
								int i3, i2, i1;
								double u1[((2+(1<<8))+1)], u2[((2+(1<<8))+1)];
								if (timeron)
								{
									timer_start(4);
								}
								#pragma loop name resid#0

								#pragma cetus private(i1, i2, i3, u1, u2)
								#pragma cetus parallel
								#pragma omp parallel for private(i1, i2, i3, u1, u2)
								for (i3=1; i3<(n3-1); i3 ++ )
								{
									#pragma loop name resid#0#0

									#pragma cetus private(i1, i2)
									#pragma cetus lastprivate(u1, u2)
									for (i2=1; i2<(n2-1); i2 ++ )
									{
										#pragma loop name resid#0#0#0

										#pragma cetus private(i1)
										for (i1=0; i1<n1; i1 ++ )
										{
											u1[i1]=(((u[i3][i2-1][i1]+u[i3][i2+1][i1])+u[i3-1][i2][i1])+u[i3+1][i2][i1]);
											u2[i1]=(((u[i3-1][i2-1][i1]+u[i3-1][i2+1][i1])+u[i3+1][i2-1][i1])+u[i3+1][i2+1][i1]);
										}
										#pragma loop name resid#0#0#1

										#pragma cetus private(i1)
										for (i1=1; i1<(n1-1); i1 ++ )
										{
											r[i3][i2][i1]=(((v[i3][i2][i1]-(a[0]*u[i3][i2][i1]))-(a[2]*((u2[i1]+u1[i1-1])+u1[i1+1])))-(a[3]*(u2[i1-1]+u2[i1+1])));
										}
									}
								}
								if (timeron)
								{
									timer_stop(4);
								}
								comm3(r, n1, n2, n3, k);
								if (debug_vec[0]>=1)
								{
									rep_nrm(r, n1, n2, n3, "   resid", k);
								}
								if (debug_vec[2]>=k)
								{
									showall(r, n1, n2, n3);
								}
								return ;
							}

							#ifdef Initial
							static void rprj3(void * or, int m1k, int m2k, int m3k, void * os, int m1j, int m2j, int m3j, int k)
							{
								double (* r)[m2k][m1k] = (double (* )[m2k][m1k])or;
								double (* s)[m2j][m1j] = (double (* )[m2j][m1j])os;
								int j3, j2, j1, i3, i2, i1, d1, d2, d3, j;
								double x1[((2+(1<<8))+1)], y1[((2+(1<<8))+1)], x2, y2;
								if (timeron)
								{
									timer_start(6);
								}
								if (m1k==3)
								{
									d1=2;
								}
								else
								{
									d1=1;
								}
								if (m2k==3)
								{
									d2=2;
								}
								else
								{
									d2=1;
								}
								if (m3k==3)
								{
									d3=2;
								}
								else
								{
									d3=1;
								}
								#pragma experimental section start In=int:d1,int:d2,int:d3,int:i1,int:i2,int:i3,int:j1,int:j2,int:j3,int:m1j,int:m2j,int:m3j,int:rXIndex,int:m2k,int:m1k,double:r:rXIndex*m2k*m1k,int:sXIndex,double:s:sXIndex*m2j*m1j,double:x1:((2+(1<<8))+1),double:x2,double:y1:((2+(1<<8))+1),double:y2,

								int sXIndex= (sizeof(**s)/sizeof(double));
								int rXIndex= (sizeof(**r)/sizeof(double));
								if (iteration_count==iterationNum) {
									gettimeofday(&captureStartInputWrite, NULL);
									initialInputStateFile= fopen("initialInputStateFile","wb");
									write_var_to_file("d1",&d1, sizeof(int), 1,initialInputStateFile );
									write_var_to_file("d2",&d2, sizeof(int), 1,initialInputStateFile );
									write_var_to_file("d3",&d3, sizeof(int), 1,initialInputStateFile );
									write_var_to_file("i1",&i1, sizeof(int), 1,initialInputStateFile );
									write_var_to_file("i2",&i2, sizeof(int), 1,initialInputStateFile );
									write_var_to_file("i3",&i3, sizeof(int), 1,initialInputStateFile );
									write_var_to_file("j1",&j1, sizeof(int), 1,initialInputStateFile );
									write_var_to_file("j2",&j2, sizeof(int), 1,initialInputStateFile );
									write_var_to_file("j3",&j3, sizeof(int), 1,initialInputStateFile );
									write_var_to_file("m1j",&m1j, sizeof(int), 1,initialInputStateFile );
									write_var_to_file("m2j",&m2j, sizeof(int), 1,initialInputStateFile );
									write_var_to_file("m3j",&m3j, sizeof(int), 1,initialInputStateFile );
									write_var_to_file("rXIndex",&rXIndex, sizeof(int), 1,initialInputStateFile );
									write_var_to_file("m2k",&m2k, sizeof(int), 1,initialInputStateFile );
									write_var_to_file("m1k",&m1k, sizeof(int), 1,initialInputStateFile );
									write_var_to_file("r",r, sizeof(double), rXIndex*m2k*m1k,initialInputStateFile );
									write_var_to_file("sXIndex",&sXIndex, sizeof(int), 1,initialInputStateFile );
									write_var_to_file("s",s, sizeof(double), sXIndex*m2j*m1j,initialInputStateFile );
									write_var_to_file("x1",x1, sizeof(double), ((2+(1<<8))+1),initialInputStateFile );
									write_var_to_file("x2",&x2, sizeof(double), 1,initialInputStateFile );
									write_var_to_file("y1",y1, sizeof(double), ((2+(1<<8))+1),initialInputStateFile );
									write_var_to_file("y2",&y2, sizeof(double), 1,initialInputStateFile );
									gettimeofday(&captureEndInputWrite, NULL);
									capture_input_write_time = (double) ((captureEndInputWrite.tv_sec * 1000000 + captureEndInputWrite.tv_usec) - (captureStartInputWrite.tv_sec * 1000000 + captureStartInputWrite.tv_usec)) / 1000000;
									if(verbosity > 0){
										printf("The input set of iteration %d is written to the file.\n",iteration_count);
										printf("The capture_input_write_time took %.6lf seconds.\n", capture_input_write_time);
									}
								}else{if(verbosity > 0){printf("The input set of iteration %d is not written to the file.\n",iteration_count);}}
									#endif //Initial

									#ifdef Experimental
									int main()
									{
										void * or; int m1k; int m2k; int m3k; void * os; int m1j; int m2j; int m3j; int k;
										int j3, j2, j1, i3, i2, i1, d1, d2, d3, j;
										double x1[((2+(1<<8))+1)], y1[((2+(1<<8))+1)], x2, y2;
										omp_set_num_threads(4);
										gettimeofday(&prgstart, NULL);
										double initial_exp_time_used;
										if(access("initialInputStateFile", F_OK) == 0){
											if(verbosity > 1) {printf("The file to load the initial state of variables exists.\n");}
												initialInputStateFile = fopen("initialInputStateFile", "rb");
										}else{
												printf("The file to load variables' initial states doesn't exist. Comment out #define Experimental and run the original Cetus output file.\n");
												exit(0);
										}
										gettimeofday(&replayStartInputRead, NULL);
										/* Seek to the beginning of the file */
										fseek(initialInputStateFile, 0, SEEK_SET);
										read_var_from_file("d1",&d1, sizeof(int), 1,initialInputStateFile );
										read_var_from_file("d2",&d2, sizeof(int), 1,initialInputStateFile );
										read_var_from_file("d3",&d3, sizeof(int), 1,initialInputStateFile );
										read_var_from_file("i1",&i1, sizeof(int), 1,initialInputStateFile );
										read_var_from_file("i2",&i2, sizeof(int), 1,initialInputStateFile );
										read_var_from_file("i3",&i3, sizeof(int), 1,initialInputStateFile );
										read_var_from_file("j1",&j1, sizeof(int), 1,initialInputStateFile );
										read_var_from_file("j2",&j2, sizeof(int), 1,initialInputStateFile );
										read_var_from_file("j3",&j3, sizeof(int), 1,initialInputStateFile );
										read_var_from_file("m1j",&m1j, sizeof(int), 1,initialInputStateFile );
										read_var_from_file("m2j",&m2j, sizeof(int), 1,initialInputStateFile );
										read_var_from_file("m3j",&m3j, sizeof(int), 1,initialInputStateFile );
										int rXIndex;
										read_var_from_file("rXIndex",&rXIndex, sizeof(int), 1,initialInputStateFile );
										read_var_from_file("m2k",&m2k, sizeof(int), 1,initialInputStateFile );
										read_var_from_file("m1k",&m1k, sizeof(int), 1,initialInputStateFile );
										double r[rXIndex][m2k][m1k];
										read_var_from_file("r",r, sizeof(double), rXIndex*m2k*m1k,initialInputStateFile );
										int sXIndex;
										read_var_from_file("sXIndex",&sXIndex, sizeof(int), 1,initialInputStateFile );
										double s[sXIndex][m2j][m1j];
										read_var_from_file("s",s, sizeof(double), sXIndex*m2j*m1j,initialInputStateFile );
										read_var_from_file("x1",x1, sizeof(double), ((2+(1<<8))+1),initialInputStateFile );
										read_var_from_file("x2",&x2, sizeof(double), 1,initialInputStateFile );
										read_var_from_file("y1",y1, sizeof(double), ((2+(1<<8))+1),initialInputStateFile );
										read_var_from_file("y2",&y2, sizeof(double), 1,initialInputStateFile );
										gettimeofday(&replayEndInputRead, NULL);
										replay_input_read_time = (double) ((replayEndInputRead.tv_sec * 1000000 + replayEndInputRead.tv_usec) - (replayStartInputRead.tv_sec * 1000000 + replayStartInputRead.tv_usec)) / 1000000;
										if(verbosity > 0){
											printf("The input set of the experimental section is read.\n");
											printf("The replay_input_read_time took %.6lf seconds.\n", replay_input_read_time);
										}
										#endif //Experimental

										gettimeofday(&startexp, NULL);
										//******************START of Experimental Section******************

										#pragma loop name rprj3#0
										#pragma omp parallel for default(shared) private(j1,j2,j3,i1,i2,i3,x1,y1,x2,y2)
										for (j3=1; j3<(m3j-1); j3 ++ )
										{
											i3=((2*j3)-d3);
											#pragma loop name rprj3#0#0
											for (j2=1; j2<(m2j-1); j2 ++ )
											{
												i2=((2*j2)-d2);
												#pragma loop name rprj3#0#0#0
												//#pragma omp parallel for private(i1, j1)
												for (j1=1; j1<m1j; j1 ++ )
												{
													i1=((2*j1)-d1);
													x1[i1]=(((r[i3+1][i2][i1]+r[i3+1][i2+2][i1])+r[i3][i2+1][i1])+r[i3+2][i2+1][i1]);
													y1[i1]=(((r[i3][i2][i1]+r[i3+2][i2][i1])+r[i3][i2+2][i1])+r[i3+2][i2+2][i1]);
												}
												#pragma loop name rprj3#0#0#1
												//#pragma omp parallel for private(i1, j1, x2, y2)
												for (j1=1; j1<(m1j-1); j1 ++ )
												{
													i1=((2*j1)-d1);
													y2=(((r[i3][i2][i1+1]+r[i3+2][i2][i1+1])+r[i3][i2+2][i1+1])+r[i3+2][i2+2][i1+1]);
													x2=(((r[i3+1][i2][i1+1]+r[i3+1][i2+2][i1+1])+r[i3][i2+1][i1+1])+r[i3+2][i2+1][i1+1]);
													s[j3][j2][j1]=((((0.5*r[i3+1][i2+1][i1+1])+(0.25*((r[i3+1][i2+1][i1]+r[i3+1][i2+1][i1+2])+x2)))+(0.125*((x1[i1]+x1[i1+2])+y2)))+(0.0625*(y1[i1]+y1[i1+2])));
												}
											}
										}
										#pragma experimental section stop Out=int:sXIndex,int:m2j,int:m1j,double:s:sXIndex*m2j*m1j,
										//****************** END of Experimental Section ******************
										gettimeofday(&endexp, NULL);
										exp_time_used = (double) ((endexp.tv_sec * 1000000 + endexp.tv_usec) - (startexp.tv_sec * 1000000 + startexp.tv_usec)) / 1000000;
										#ifdef Initial
										if (iteration_count==iterationNum) {
											saved_exp_time_used=exp_time_used;
											FILE* initialExperimentalSectionRunTime= fopen("initialExperimentalSectionRunTime","w");
											write_var_to_file("exp_time_used", &exp_time_used, sizeof(double), 1, initialExperimentalSectionRunTime);
										if(verbosity > 0) {printf("The original experimental code section took %.6lf seconds to run.\n", exp_time_used);}
											gettimeofday(&captureStartOutputWrite, NULL);
											initialOutputStateFile = fopen("initialOutputStateFile","wb");
											write_var_to_file("sXIndex",&sXIndex, sizeof(int), 1,initialOutputStateFile );
											write_var_to_file("m2j",&m2j, sizeof(int), 1,initialOutputStateFile );
											write_var_to_file("m1j",&m1j, sizeof(int), 1,initialOutputStateFile );
											write_var_to_file("s",s, sizeof(double), sXIndex*m2j*m1j,initialOutputStateFile );
											gettimeofday(&captureEndOutputWrite, NULL);
											capture_output_write_time = (double) ((captureEndOutputWrite.tv_sec * 1000000 + captureEndOutputWrite.tv_usec) - (captureStartOutputWrite.tv_sec * 1000000 + captureStartOutputWrite.tv_usec)) / 1000000;
											if(verbosity > 0){
												printf("The output set of iteration %d is written to the file.\n",iteration_count);
												printf("The capture_output_write_time took %.6lf seconds.\n\n", capture_output_write_time);
											}
										}else{if(verbosity > 0){printf("The output set of iteration %d is not written to the file.\n",iteration_count);}}
											iteration_count++;
											#endif //Initial
											#ifdef Experimental
											printf("\nComparing the execution time of the experimental section before and after modification:\n");
											printf("The modified experimental section took %.6lf seconds to run.\n", exp_time_used);
											FILE* initialExperimentalSectionRunTime = fopen("initialExperimentalSectionRunTime", "r");
											read_var_from_file("initial_exp_time_used",&initial_exp_time_used, sizeof(double), 1, initialExperimentalSectionRunTime);
											printf("The original experimental section (before modifications) took %.6lf seconds to run.\n", initial_exp_time_used);
											if(closeEnough(initial_exp_time_used, exp_time_used)){
												printf("%sThe execution time of the experimental section before and after modification remains the same.%s\n",GRN,RESET);
											}else{
											printf("%sThere is a change of %.6lf seconds between the execution time of the experimental section before and after the modification%s\n",MAG, fabs((initial_exp_time_used) - (exp_time_used)), RESET);}
											FILE* modifiedExperimentalSectionTimeFile = fopen("modifiedExperimentalSectionRunTime", "w");
											fwrite(&exp_time_used, sizeof(double), 1, modifiedExperimentalSectionTimeFile);
											gettimeofday(&replayStartOutputWriteCompare, NULL);
											modifiedOutputStateFile = fopen("modifiedOutputStateFile","wb");
											write_var_to_file("sXIndex",&sXIndex, sizeof(int), 1,modifiedOutputStateFile );
											write_var_to_file("m2j",&m2j, sizeof(int), 1,modifiedOutputStateFile );
											write_var_to_file("m1j",&m1j, sizeof(int), 1,modifiedOutputStateFile );
											write_var_to_file("s",s, sizeof(double), sXIndex*m2j*m1j,modifiedOutputStateFile );

											if(verbosity > 0){
												printf("\n\nValues of output variables before modification:\n");
												initialOutputStateFile= fopen("initialOutputStateFile","rb");
												read_var_from_file("sXIndex",&sXIndex, sizeof(int), 1,initialOutputStateFile );
												printf("sXIndex=%d \n",sXIndex);
												read_var_from_file("m2j",&m2j, sizeof(int), 1,initialOutputStateFile );
												printf("m2j=%d \n",m2j);
												read_var_from_file("m1j",&m1j, sizeof(int), 1,initialOutputStateFile );
												printf("m1j=%d \n",m1j);
												read_var_from_file("s",s, sizeof(double),sXIndex*m2j*m1j,initialOutputStateFile );
												for(int loop = 0; loop <sXIndex*m2j*m1j; loop++)
												  		printf("s[%d]=%.6lf\t", loop,s[loop]);
												printf("\n\n");

												printf("\n\nValues of output variables after modification:\n");
												fseek(modifiedOutputStateFile, 0, SEEK_SET);
												modifiedOutputStateFile= fopen("modifiedOutputStateFile","rb");
												read_var_from_file("sXIndex",&sXIndex, sizeof(int), 1,modifiedOutputStateFile );
												printf("sXIndex=%d \n",sXIndex);
												read_var_from_file("m2j",&m2j, sizeof(int), 1,modifiedOutputStateFile );
												printf("m2j=%d \n",m2j);
												read_var_from_file("m1j",&m1j, sizeof(int), 1,modifiedOutputStateFile );
												printf("m1j=%d \n",m1j);
												read_var_from_file("s",s, sizeof(double),sXIndex*m2j*m1j,modifiedOutputStateFile );
												for(int loop = 0; loop <sXIndex*m2j*m1j; loop++)
												  		printf("s[%d]=%.6lf\t", loop,s[loop]);
												printf("\n\n");

											}//end of if
											printf("\n\nComparing the output variables of the experimental section before and after modification:\n");
											fseek(modifiedOutputStateFile, 0, SEEK_SET);
											compare_two_int_variables_in_binary_files("initialOutputStateFile","modifiedOutputStateFile","sXIndex", sizeof(int), 1, "int",0);
											compare_two_int_variables_in_binary_files("initialOutputStateFile","modifiedOutputStateFile","m2j", sizeof(int), 1, "int",0+sizeof(int));
											compare_two_int_variables_in_binary_files("initialOutputStateFile","modifiedOutputStateFile","m1j", sizeof(int), 1, "int",0+sizeof(int)+sizeof(int));
											compare_two_double_variables_in_binary_files("initialOutputStateFile","modifiedOutputStateFile","s", sizeof(double), sXIndex*m2j*m1j, "double",0+sizeof(int)+sizeof(int)+sizeof(int));

											gettimeofday(&replayEndOutputWriteCompare, NULL);
											replay_output_write_time = (double) ((replayEndOutputWriteCompare.tv_sec * 1000000 + replayEndOutputWriteCompare.tv_usec) - (replayStartOutputWriteCompare.tv_sec * 1000000 + replayStartOutputWriteCompare.tv_usec)) / 1000000;
											if(verbosity > 0){
												printf("\nThe output set of the experimental section is recorded and compared.\n");
												printf("The replay_output_write_time took %.6lf seconds.\n\n", replay_output_write_time);
											}
											gettimeofday(&prgend, NULL);
											prg_time_used = (double) ((prgend.tv_sec * 1000000 + prgend.tv_usec) - (prgstart.tv_sec * 1000000 + prgstart.tv_usec)) / 1000000;
											printf("\nExecution time of the experimental section in the Replaying Phase: %.6lf (seconds) \n", prg_time_used);
											printf("Overhead of the Replaying Phase is: %.6lf (seconds) \n", prg_time_used-exp_time_used);
											printf("Execution time of the (modified) experimental section: %.6lf (seconds) \n", exp_time_used);
											_ret_val_0=0;
											return _ret_val_0;
										}

										static void rprj3(void * or, int m1k, int m2k, int m3k, void * os, int m1j, int m2j, int m3j, int k)
										{
											double (* r)[m2k][m1k] = (double (* )[m2k][m1k])or;
											double (* s)[m2j][m1j] = (double (* )[m2j][m1j])os;
											int j3, j2, j1, i3, i2, i1, d1, d2, d3, j;
											double x1[((2+(1<<8))+1)], y1[((2+(1<<8))+1)], x2, y2;
											if (timeron)
											{
												timer_start(6);
											}
											if ((m1k==3))
											{
												d1=2;
											}
											else
											{
												d1=1;
											}
											if ((m2k==3))
											{
												d2=2;
											}
											else
											{
												d2=1;
											}
											if ((m3k==3))
											{
												d3=2;
											}
											else
											{
												d3=1;
											}
											#pragma experimental section start null
											#pragma loop name rprj3#0

											#pragma cetus private(i1, i2, i3, j1, j2, j3, x2, y2)
											for (j3=1; j3<(m3j-1); j3 ++ )
											{
												i3=((2*j3)-d3);
												#pragma loop name rprj3#0#0

												#pragma cetus private(i1, i2, j1, j2, x2, y2)
												for (j2=1; j2<(m2j-1); j2 ++ )
												{
													i2=((2*j2)-d2);
													#pragma loop name rprj3#0#0#0

													#pragma cetus private(i1, j1)
													#pragma cetus parallel
													#pragma omp parallel for private(i1, j1)
													for (j1=1; j1<m1j; j1 ++ )
													{
														i1=((2*j1)-d1);
														x1[i1]=(((r[(i3+1)][i2][i1]+r[(i3+1)][(i2+2)][i1])+r[i3][(i2+1)][i1])+r[(i3+2)][(i2+1)][i1]);
														y1[i1]=(((r[i3][i2][i1]+r[(i3+2)][i2][i1])+r[i3][(i2+2)][i1])+r[(i3+2)][(i2+2)][i1]);
													}
													#pragma loop name rprj3#0#0#1

													#pragma cetus private(i1, j1, x2, y2)
													#pragma cetus parallel
													#pragma omp parallel for private(i1, j1, x2, y2)
													for (j1=1; j1<(m1j-1); j1 ++ )
													{
														i1=((2*j1)-d1);
														y2=(((r[i3][i2][(i1+1)]+r[(i3+2)][i2][(i1+1)])+r[i3][(i2+2)][(i1+1)])+r[(i3+2)][(i2+2)][(i1+1)]);
														x2=(((r[(i3+1)][i2][(i1+1)]+r[(i3+1)][(i2+2)][(i1+1)])+r[i3][(i2+1)][(i1+1)])+r[(i3+2)][(i2+1)][(i1+1)]);
														s[j3][j2][j1]=((((0.5*r[(i3+1)][(i2+1)][(i1+1)])+(0.25*((r[(i3+1)][(i2+1)][i1]+r[(i3+1)][(i2+1)][(i1+2)])+x2)))+(0.125*((x1[i1]+x1[(i1+2)])+y2)))+(0.0625*(y1[i1]+y1[(i1+2)])));
													}
												}
											}
											#pragma experimental section stop null
											if (timeron)
											{
												timer_stop(6);
											}
											j=(k-1);
											comm3(s, m1j, m2j, m3j, j);
											if ((debug_vec[0]>=1))
											{
												rep_nrm(s, m1j, m2j, m3j, "   rprj3", (k-1));
											}
											if ((debug_vec[4]>=k))
											{
												showall(s, m1j, m2j, m3j);
											}
											return ;
										}

										#endif //Experimental
										#ifdef Initial


										if (timeron)
										{
											timer_stop(6);
										}
										j=(k-1);
										comm3(s, m1j, m2j, m3j, j);
										if (debug_vec[0]>=1)
										{
											rep_nrm(s, m1j, m2j, m3j, "   rprj3", k-1);
										}
										if (debug_vec[4]>=k)
										{
											showall(s, m1j, m2j, m3j);
										}
										return ;
									}

									#endif //Initial
									static void interp(void * oz, int mm1, int mm2, int mm3, void * ou, int n1, int n2, int n3, int k)
									{
										double (* z)[mm2][mm1] = (double (* )[mm2][mm1])oz;
										double (* u)[n2][n1] = (double (* )[n2][n1])ou;
										int i3, i2, i1, d1, d2, d3, t1, t2, t3;
										double z1[((2+(1<<8))+1)], z2[((2+(1<<8))+1)], z3[((2+(1<<8))+1)];
										if (timeron)
										{
											timer_start(7);
										}
										if (((n1!=3)&&(n2!=3))&&(n3!=3))
										{
											#pragma loop name interp#0

											#pragma cetus private(i1, i2, i3, z1, z2, z3)
											#pragma cetus parallel
											#pragma omp parallel for private(i1, i2, i3, z1, z2, z3)
											for (i3=0; i3<(mm3-1); i3 ++ )
											{
												#pragma loop name interp#0#0

												#pragma cetus private(i1, i2)
												#pragma cetus lastprivate(z1, z2, z3)
												for (i2=0; i2<(mm2-1); i2 ++ )
												{
													#pragma loop name interp#0#0#0

													#pragma cetus private(i1)
													for (i1=0; i1<mm1; i1 ++ )
													{
														z1[i1]=(z[i3][i2+1][i1]+z[i3][i2][i1]);
														z2[i1]=(z[i3+1][i2][i1]+z[i3][i2][i1]);
														z3[i1]=((z[i3+1][i2+1][i1]+z[i3+1][i2][i1])+z1[i1]);
													}
													#pragma loop name interp#0#0#1

													#pragma cetus private(i1)
													for (i1=0; i1<(mm1-1); i1 ++ )
													{
														u[2*i3][2*i2][2*i1]=(u[2*i3][2*i2][2*i1]+z[i3][i2][i1]);
														u[2*i3][2*i2][(2*i1)+1]=(u[2*i3][2*i2][(2*i1)+1]+(0.5*(z[i3][i2][i1+1]+z[i3][i2][i1])));
													}
													#pragma loop name interp#0#0#2

													#pragma cetus private(i1)
													for (i1=0; i1<(mm1-1); i1 ++ )
													{
														u[2*i3][(2*i2)+1][2*i1]=(u[2*i3][(2*i2)+1][2*i1]+(0.5*z1[i1]));
														u[2*i3][(2*i2)+1][(2*i1)+1]=(u[2*i3][(2*i2)+1][(2*i1)+1]+(0.25*(z1[i1]+z1[i1+1])));
													}
													#pragma loop name interp#0#0#3

													#pragma cetus private(i1)
													for (i1=0; i1<(mm1-1); i1 ++ )
													{
														u[(2*i3)+1][2*i2][2*i1]=(u[(2*i3)+1][2*i2][2*i1]+(0.5*z2[i1]));
														u[(2*i3)+1][2*i2][(2*i1)+1]=(u[(2*i3)+1][2*i2][(2*i1)+1]+(0.25*(z2[i1]+z2[i1+1])));
													}
													#pragma loop name interp#0#0#4

													#pragma cetus private(i1)
													for (i1=0; i1<(mm1-1); i1 ++ )
													{
														u[(2*i3)+1][(2*i2)+1][2*i1]=(u[(2*i3)+1][(2*i2)+1][2*i1]+(0.25*z3[i1]));
														u[(2*i3)+1][(2*i2)+1][(2*i1)+1]=(u[(2*i3)+1][(2*i2)+1][(2*i1)+1]+(0.125*(z3[i1]+z3[i1+1])));
													}
												}
											}
										}
										else
										{
											if (n1==3)
											{
												d1=2;
												t1=1;
											}
											else
											{
												d1=1;
												t1=0;
											}
											if (n2==3)
											{
												d2=2;
												t2=1;
											}
											else
											{
												d2=1;
												t2=0;
											}
											if (n3==3)
											{
												d3=2;
												t3=1;
											}
											else
											{
												d3=1;
												t3=0;
											}
											#pragma loop name interp#1

											#pragma cetus private(i1, i2, i3)
											#pragma cetus parallel
											#pragma omp parallel for private(i1, i2, i3)
											for (i3=d3; i3<=(mm3-1); i3 ++ )
											{
												#pragma loop name interp#1#0

												#pragma cetus private(i1, i2)
												for (i2=d2; i2<=(mm2-1); i2 ++ )
												{
													#pragma loop name interp#1#0#0

													#pragma cetus private(i1)
													for (i1=d1; i1<=(mm1-1); i1 ++ )
													{
														u[((2*i3)-d3)-1][((2*i2)-d2)-1][((2*i1)-d1)-1]=(u[((2*i3)-d3)-1][((2*i2)-d2)-1][((2*i1)-d1)-1]+z[i3-1][i2-1][i1-1]);
													}
													#pragma loop name interp#1#0#1

													#pragma cetus private(i1)
													for (i1=1; i1<=(mm1-1); i1 ++ )
													{
														u[((2*i3)-d3)-1][((2*i2)-d2)-1][((2*i1)-t1)-1]=(u[((2*i3)-d3)-1][((2*i2)-d2)-1][((2*i1)-t1)-1]+(0.5*(z[i3-1][i2-1][i1]+z[i3-1][i2-1][i1-1])));
													}
												}
												#pragma loop name interp#1#1

												#pragma cetus private(i1, i2)
												for (i2=1; i2<=(mm2-1); i2 ++ )
												{
													#pragma loop name interp#1#1#0

													#pragma cetus private(i1)
													for (i1=d1; i1<=(mm1-1); i1 ++ )
													{
														u[((2*i3)-d3)-1][((2*i2)-t2)-1][((2*i1)-d1)-1]=(u[((2*i3)-d3)-1][((2*i2)-t2)-1][((2*i1)-d1)-1]+(0.5*(z[i3-1][i2][i1-1]+z[i3-1][i2-1][i1-1])));
													}
													#pragma loop name interp#1#1#1

													#pragma cetus private(i1)
													for (i1=1; i1<=(mm1-1); i1 ++ )
													{
														u[((2*i3)-d3)-1][((2*i2)-t2)-1][((2*i1)-t1)-1]=(u[((2*i3)-d3)-1][((2*i2)-t2)-1][((2*i1)-t1)-1]+(0.25*(((z[i3-1][i2][i1]+z[i3-1][i2-1][i1])+z[i3-1][i2][i1-1])+z[i3-1][i2-1][i1-1])));
													}
												}
											}
											#pragma loop name interp#2

											#pragma cetus private(i1, i2, i3)
											#pragma cetus parallel
											#pragma omp parallel for private(i1, i2, i3)
											for (i3=1; i3<=(mm3-1); i3 ++ )
											{
												#pragma loop name interp#2#0

												#pragma cetus private(i1, i2)
												for (i2=d2; i2<=(mm2-1); i2 ++ )
												{
													#pragma loop name interp#2#0#0

													#pragma cetus private(i1)
													for (i1=d1; i1<=(mm1-1); i1 ++ )
													{
														u[((2*i3)-t3)-1][((2*i2)-d2)-1][((2*i1)-d1)-1]=(u[((2*i3)-t3)-1][((2*i2)-d2)-1][((2*i1)-d1)-1]+(0.5*(z[i3][i2-1][i1-1]+z[i3-1][i2-1][i1-1])));
													}
													#pragma loop name interp#2#0#1

													#pragma cetus private(i1)
													for (i1=1; i1<=(mm1-1); i1 ++ )
													{
														u[((2*i3)-t3)-1][((2*i2)-d2)-1][((2*i1)-t1)-1]=(u[((2*i3)-t3)-1][((2*i2)-d2)-1][((2*i1)-t1)-1]+(0.25*(((z[i3][i2-1][i1]+z[i3][i2-1][i1-1])+z[i3-1][i2-1][i1])+z[i3-1][i2-1][i1-1])));
													}
												}
												#pragma loop name interp#2#1

												#pragma cetus private(i1, i2)
												for (i2=1; i2<=(mm2-1); i2 ++ )
												{
													#pragma loop name interp#2#1#0

													#pragma cetus private(i1)
													for (i1=d1; i1<=(mm1-1); i1 ++ )
													{
														u[((2*i3)-t3)-1][((2*i2)-t2)-1][((2*i1)-d1)-1]=(u[((2*i3)-t3)-1][((2*i2)-t2)-1][((2*i1)-d1)-1]+(0.25*(((z[i3][i2][i1-1]+z[i3][i2-1][i1-1])+z[i3-1][i2][i1-1])+z[i3-1][i2-1][i1-1])));
													}
													#pragma loop name interp#2#1#1

													#pragma cetus private(i1)
													for (i1=1; i1<=(mm1-1); i1 ++ )
													{
														u[((2*i3)-t3)-1][((2*i2)-t2)-1][((2*i1)-t1)-1]=(u[((2*i3)-t3)-1][((2*i2)-t2)-1][((2*i1)-t1)-1]+(0.125*(((((((z[i3][i2][i1]+z[i3][i2-1][i1])+z[i3][i2][i1-1])+z[i3][i2-1][i1-1])+z[i3-1][i2][i1])+z[i3-1][i2-1][i1])+z[i3-1][i2][i1-1])+z[i3-1][i2-1][i1-1])));
													}
												}
											}
										}
										if (timeron)
										{
											timer_stop(7);
										}
										if (debug_vec[0]>=1)
										{
											rep_nrm(z, mm1, mm2, mm3, "z: inter", k-1);
											rep_nrm(u, n1, n2, n3, "u: inter", k);
										}
										if (debug_vec[5]>=k)
										{
											showall(z, mm1, mm2, mm3);
											showall(u, n1, n2, n3);
										}
										return ;
									}

									static void norm2u3(void * or, int n1, int n2, int n3, double * rnm2, double * rnmu, int nx, int ny, int nz)
									{
										double (* r)[n2][n1] = (double (* )[n2][n1])or;
										double s, a;
										int i3, i2, i1;
										double dn;
										if (timeron)
										{
											timer_start(8);
										}
										dn=(((1.0*nx)*ny)*nz);
										s=0.0;
										( * rnmu)=0.0;
										#pragma loop name norm2u3#0

										#pragma cetus private(a, i1, i2, i3)
										for (i3=1; i3<(n3-1); i3 ++ )
										{
											#pragma loop name norm2u3#0#0

											#pragma cetus private(a, i1, i2)
											for (i2=1; i2<(n2-1); i2 ++ )
											{
												#pragma loop name norm2u3#0#0#0

												#pragma cetus private(a, i1)
												for (i1=1; i1<(n1-1); i1 ++ )
												{
													s=(s+pow(r[i3][i2][i1], 2.0));
													a=fabs(r[i3][i2][i1]);
													if (a>( * rnmu))
													{
														( * rnmu)=a;
													}
												}
											}
										}
										( * rnm2)=sqrt(s/dn);
										if (timeron)
										{
											timer_stop(8);
										}
										return ;
									}

									static void rep_nrm(void * u, int n1, int n2, int n3, char * title, int kk)
									{
										double rnm2, rnmu;
										norm2u3(u, n1, n2, n3,  & rnm2,  & rnmu, nx[kk], ny[kk], nz[kk]);
										printf(" Level%2d in %8s: norms =%21.14E%21.14E\n", kk, title, rnm2, rnmu);
										return ;
									}

									static void comm3(void * ou, int n1, int n2, int n3, int kk)
									{
										double (* u)[n2][n1] = (double (* )[n2][n1])ou;
										int i1, i2, i3;
										if (timeron)
										{
											timer_start(9);
										}
										#pragma loop name comm3#0

										#pragma cetus private(i2, i3)
										#pragma cetus parallel
										#pragma omp parallel for private(i2, i3)
										for (i3=1; i3<(n3-1); i3 ++ )
										{
											#pragma loop name comm3#0#0

											#pragma cetus private(i2)
											for (i2=1; i2<(n2-1); i2 ++ )
											{
												u[i3][i2][0]=u[i3][i2][n1-2];
												u[i3][i2][n1-1]=u[i3][i2][1];
											}
										}
										#pragma loop name comm3#1

										#pragma cetus private(i1, i3)
										#pragma cetus parallel
										#pragma omp parallel for private(i1, i3)
										for (i3=1; i3<(n3-1); i3 ++ )
										{
											#pragma loop name comm3#1#0

											#pragma cetus private(i1)
											for (i1=0; i1<n1; i1 ++ )
											{
												u[i3][0][i1]=u[i3][n2-2][i1];
												u[i3][n2-1][i1]=u[i3][1][i1];
											}
										}
										#pragma loop name comm3#2

										#pragma cetus private(i1, i2)
										#pragma cetus parallel
										#pragma omp parallel for private(i1, i2)
										for (i2=0; i2<n2; i2 ++ )
										{
											#pragma loop name comm3#2#0

											#pragma cetus private(i1)
											for (i1=0; i1<n1; i1 ++ )
											{
												u[0][i2][i1]=u[n3-2][i2][i1];
												u[n3-1][i2][i1]=u[1][i2][i1];
											}
										}
										if (timeron)
										{
											timer_stop(9);
										}
										return ;
									}

									static void zran3(void * oz, int n1, int n2, int n3, int nx, int ny, int k)
									{
										double (* z)[n2][n1] = (double (* )[n2][n1])oz;
										int i0, m0, m1;
										int i1, i2, i3, d1, e1, e2, e3;
										double xx, x0, x1, a1, a2, ai;
										const int mm = 10;
										const double a = pow(5.0, 13.0);
										const double x = 3.14159265E8;
										double ten[mm][2], best;
										int i, j1[mm][2], j2[mm][2], j3[mm][2];
										int jg[4][mm][2];
										double rdummy;
										a1=power(a, nx);
										a2=power(a, nx*ny);
										zero3(z, n1, n2, n3);
										i=((is1-2)+(nx*((is2-2)+(ny*(is3-2)))));
										ai=power(a, i);
										d1=((ie1-is1)+1);
										e1=((ie1-is1)+2);
										e2=((ie2-is2)+2);
										e3=((ie3-is3)+2);
										x0=x;
										rdummy=randlc( & x0, ai);
										#pragma loop name zran3#0

										#pragma cetus private(i2, i3, rdummy)
										for (i3=1; i3<e3; i3 ++ )
										{
											x1=x0;
											#pragma loop name zran3#0#0

											#pragma cetus private(i2, rdummy)
											for (i2=1; i2<e2; i2 ++ )
											{
												xx=x1;
												vranlc(d1,  & xx, a,  & z[i3][i2][1]);
												rdummy=randlc( & x1, a1);
											}
											rdummy=randlc( & x0, a2);
										}
										#pragma loop name zran3#1

										#pragma cetus private(i)
										#pragma cetus parallel
										#pragma omp parallel for private(i)
										for (i=0; i<mm; i ++ )
										{
											ten[i][1]=0.0;
											j1[i][1]=0;
											j2[i][1]=0;
											j3[i][1]=0;
											ten[i][0]=1.0;
											j1[i][0]=0;
											j2[i][0]=0;
											j3[i][0]=0;
										}
										#pragma loop name zran3#2

										#pragma cetus private(i1, i2, i3)
										for (i3=1; i3<(n3-1); i3 ++ )
										{
											#pragma loop name zran3#2#0

											#pragma cetus private(i1, i2)
											for (i2=1; i2<(n2-1); i2 ++ )
											{
												#pragma loop name zran3#2#0#0

												#pragma cetus private(i1)
												for (i1=1; i1<(n1-1); i1 ++ )
												{
													if (z[i3][i2][i1]>ten[0][1])
													{
														ten[0][1]=z[i3][i2][i1];
														j1[0][1]=i1;
														j2[0][1]=i2;
														j3[0][1]=i3;
														bubble(ten, j1, j2, j3, mm, 1);
													}
													if (z[i3][i2][i1]<ten[0][0])
													{
														ten[0][0]=z[i3][i2][i1];
														j1[0][0]=i1;
														j2[0][0]=i2;
														j3[0][0]=i3;
														bubble(ten, j1, j2, j3, mm, 0);
													}
												}
											}
										}
										i1=(mm-1);
										i0=(mm-1);
										#pragma loop name zran3#3

										#pragma cetus private(best, i)
										for (i=(mm-1); i>=0; i -- )
										{
											best=0.0;
											if (best<ten[i1][1])
											{
												jg[0][i][1]=0;
												jg[1][i][1]=((is1-2)+j1[i1][1]);
												jg[2][i][1]=((is2-2)+j2[i1][1]);
												jg[3][i][1]=((is3-2)+j3[i1][1]);
												i1=(i1-1);
											}
											else
											{
												jg[0][i][1]=0;
												jg[1][i][1]=0;
												jg[2][i][1]=0;
												jg[3][i][1]=0;
											}
											best=1.0;
											if (best>ten[i0][0])
											{
												jg[0][i][0]=0;
												jg[1][i][0]=((is1-2)+j1[i0][0]);
												jg[2][i][0]=((is2-2)+j2[i0][0]);
												jg[3][i][0]=((is3-2)+j3[i0][0]);
												i0=(i0-1);
											}
											else
											{
												jg[0][i][0]=0;
												jg[1][i][0]=0;
												jg[2][i][0]=0;
												jg[3][i][0]=0;
											}
										}
										m1=0;
										m0=0;
										#pragma loop name zran3#4

										#pragma cetus private(i1, i2, i3)
										#pragma cetus parallel
										#pragma omp parallel for private(i1, i2, i3)
										for (i3=0; i3<n3; i3 ++ )
										{
											#pragma loop name zran3#4#0

											#pragma cetus private(i1, i2)
											for (i2=0; i2<n2; i2 ++ )
											{
												#pragma loop name zran3#4#0#0

												#pragma cetus private(i1)
												for (i1=0; i1<n1; i1 ++ )
												{
													z[i3][i2][i1]=0.0;
												}
											}
										}
										#pragma loop name zran3#5

										#pragma cetus private(i)
										for (i=(mm-1); i>=m0; i -- )
										{
											z[jg[3][i][0]][jg[2][i][0]][jg[1][i][0]]=( - 1.0);
										}
										#pragma loop name zran3#6

										#pragma cetus private(i)
										for (i=(mm-1); i>=m1; i -- )
										{
											z[jg[3][i][1]][jg[2][i][1]][jg[1][i][1]]=( + 1.0);
										}
										comm3(z, n1, n2, n3, k);
										return ;
									}

									static void showall(void * oz, int n1, int n2, int n3)
									{
										double (* z)[n2][n1] = (double (* )[n2][n1])oz;
										int i1, i2, i3;
										int m1, m2, m3;
										m1=((n1<18) ? n1 : 18);
										m2=((n2<14) ? n2 : 14);
										m3=((n3<18) ? n3 : 18);
										printf("   \n");
										#pragma loop name showall#0

										#pragma cetus private(i1, i2, i3)
										for (i3=0; i3<m3; i3 ++ )
										{
											#pragma loop name showall#0#0

											#pragma cetus private(i1, i2)
											for (i1=0; i1<m1; i1 ++ )
											{
												#pragma loop name showall#0#0#0

												#pragma cetus private(i2)
												for (i2=0; i2<m2; i2 ++ )
												{
													printf("%6.3f", z[i3][i2][i1]);
												}
												printf("\n");
											}
											printf("  - - - - - - - \n");
										}
										printf("   \n");
										return ;
									}

									static double power(double a, int n)
									{
										double aj;
										int nj;
										double rdummy;
										double power;
										power=1.0;
										nj=n;
										aj=a;
										while (nj!=0)
										{
											if ((nj%2)==1)
											{
												rdummy=randlc( & power, aj);
											}
											rdummy=randlc( & aj, aj);
											nj=(nj/2);
										}
										return power;
									}

									static void bubble(double ten[][2], int j1[][2], int j2[][2], int j3[][2], int m, int ind)
									{
										double temp;
										int i, j_temp;
										if (ind==1)
										{
											#pragma loop name bubble#0

											for (i=0; i<(m-1); i ++ )
											{
												if (ten[i][ind]>ten[i+1][ind])
												{
													temp=ten[i+1][ind];
													ten[i+1][ind]=ten[i][ind];
													ten[i][ind]=temp;
													j_temp=j1[i+1][ind];
													j1[i+1][ind]=j1[i][ind];
													j1[i][ind]=j_temp;
													j_temp=j2[i+1][ind];
													j2[i+1][ind]=j2[i][ind];
													j2[i][ind]=j_temp;
													j_temp=j3[i+1][ind];
													j3[i+1][ind]=j3[i][ind];
													j3[i][ind]=j_temp;
												}
												else
												{
													return ;
												}
											}
										}
										else
										{
											#pragma loop name bubble#1

											for (i=0; i<(m-1); i ++ )
											{
												if (ten[i][ind]<ten[i+1][ind])
												{
													temp=ten[i+1][ind];
													ten[i+1][ind]=ten[i][ind];
													ten[i][ind]=temp;
													j_temp=j1[i+1][ind];
													j1[i+1][ind]=j1[i][ind];
													j1[i][ind]=j_temp;
													j_temp=j2[i+1][ind];
													j2[i+1][ind]=j2[i][ind];
													j2[i][ind]=j_temp;
													j_temp=j3[i+1][ind];
													j3[i+1][ind]=j3[i][ind];
													j3[i][ind]=j_temp;
												}
												else
												{
													return ;
												}
											}
										}
										return ;
									}

									static void zero3(void * oz, int n1, int n2, int n3)
									{
										double (* z)[n2][n1] = (double (* )[n2][n1])oz;
										int i1, i2, i3;
										#pragma loop name zero3#0

										#pragma cetus private(i1, i2, i3)
										#pragma cetus parallel
										#pragma omp parallel for private(i1, i2, i3)
										for (i3=0; i3<n3; i3 ++ )
										{
											#pragma loop name zero3#0#0

											#pragma cetus private(i1, i2)
											for (i2=0; i2<n2; i2 ++ )
											{
												#pragma loop name zero3#0#0#0

												#pragma cetus private(i1)
												for (i1=0; i1<n1; i1 ++ )
												{
													z[i3][i2][i1]=0.0;
												}
											}
										}
										return ;
									}

									void wtime(double * t)
									{
										static int sec =  - 1;
										struct timeval tv;
										gettimeofday( & tv, (void * )0);
										if (sec<0)
										{
											sec=tv.tv_sec;
										}
										( * t)=((tv.tv_sec-sec)+(1.0E-6*tv.tv_usec));
										return ;
									}

									static double elapsed_time(void )
									{
										double t;
										wtime( & t);
										return t;
									}

									static double start[64], elapsed[64];
									void timer_clear(int n)
									{
										elapsed[n]=0.0;
										return ;
									}

									void timer_start(int n)
									{
										start[n]=elapsed_time();
										return ;
									}

									void timer_stop(int n)
									{
										double t, now;
										now=elapsed_time();
										t=(now-start[n]);
										elapsed[n]+=t;
										return ;
									}

									double timer_read(int n)
									{
										double _ret_val_0;
										_ret_val_0=elapsed[n];
										return _ret_val_0;
									}

									double randlc(double * x, double a)
									{
										const double r23 = 1.1920928955078125E-7;
										const double r46 = r23*r23;
										const double t23 = 8388608.0;
										const double t46 = t23*t23;
										double t1, t2, t3, t4, a1, a2, x1, x2, z;
										double r;
										t1=(r23*a);
										a1=((int)t1);
										a2=(a-(t23*a1));
										t1=(r23*( * x));
										x1=((int)t1);
										x2=(( * x)-(t23*x1));
										t1=((a1*x2)+(a2*x1));
										t2=((int)(r23*t1));
										z=(t1-(t23*t2));
										t3=((t23*z)+(a2*x2));
										t4=((int)(r46*t3));
										( * x)=(t3-(t46*t4));
										r=(r46*( * x));
										return r;
									}

									void vranlc(int n, double * x, double a, double y[])
									{
										const double r23 = 1.1920928955078125E-7;
										const double r46 = r23*r23;
										const double t23 = 8388608.0;
										const double t46 = t23*t23;
										double t1, t2, t3, t4, a1, a2, x1, x2, z;
										int i;
										t1=(r23*a);
										a1=((int)t1);
										a2=(a-(t23*a1));
										#pragma loop name vranlc#0

										#pragma cetus private(i, t1, t2, t3, t4, x1, x2, z)
										for (i=0; i<n; i ++ )
										{
											t1=(r23*( * x));
											x1=((int)t1);
											x2=(( * x)-(t23*x1));
											t1=((a1*x2)+(a2*x1));
											t2=((int)(r23*t1));
											z=(t1-(t23*t2));
											t3=((t23*z)+(a2*x2));
											t4=((int)(r46*t3));
											( * x)=(t3-(t46*t4));
											y[i]=(r46*( * x));
										}
										return ;
									}

									void print_results(char * name, char class, int n1, int n2, int n3, int niter, double t, double mops, char * optype, logical verified, char * npbversion, char * compiletime, char * cs1, char * cs2, char * cs3, char * cs4, char * cs5, char * cs6, char * cs7)
									{
										char size[16];
										int j;
										printf("\n\n %s Benchmark Completed.\n", name);
										printf(" Class           =             %12c\n", class);
										if ((n2==0)&&(n3==0))
										{
											if ((name[0]=='E')&&(name[1]=='P'))
											{
												sprintf(size, "%15.0lf", pow(2.0, n1));
												j=14;
												if (size[j]=='.')
												{
													size[j]=' ';
													j -- ;
												}
												size[j+1]='\0';
												printf(" Size            =          %15s\n", size);
											}
											else
											{
												printf(" Size            =             %12d\n", n1);
											}
										}
										else
										{
											printf(" Size            =           %4dx%4dx%4d\n", n1, n2, n3);
										}
										printf(" Iterations      =             %12d\n", niter);
										printf(" Time in seconds =             %12.2lf\n", t);
										printf(" Mop/s total     =          %15.2lf\n", mops);
										printf(" Operation type  = %24s\n", optype);
										if (verified)
										{
											printf(" Verification    =             %12s\n", "SUCCESSFUL");
										}
										else
										{
											printf(" Verification    =             %12s\n", "UNSUCCESSFUL");
										}
										printf(" Version         =             %12s\n", npbversion);
										printf(" Compile date    =             %12s\n", compiletime);
										printf("\n Compile options:\n""    CC           = %s\n", cs1);
										printf("    CLINK        = %s\n", cs2);
										printf("    C_LIB        = %s\n", cs3);
										printf("    C_INC        = %s\n", cs4);
										printf("    CFLAGS       = %s\n", cs5);
										printf("    CLINKFLAGS   = %s\n", cs6);
										printf("    RAND         = %s\n", cs7);
										printf("\n--------------------------------------\n"" Please send all errors/feedbacks to:\n"" Center for Manycore Programming\n"" cmp@aces.snu.ac.kr\n"" http://aces.snu.ac.kr\n""--------------------------------------\n\n");
										return ;
									}
